The data was extracted from https://cov-lineages.org/lineage_list.html.

Data Handling

covid_rep<-read.csv("/Users/felixbarenysmarimon/Desktop/PROJECT/mutations covid/covid_variants.csv")
covid_rep<-covid_rep[,c(-1,-2,-4,-6,-8,-10,-12,-14,-15,-16)]
covid_rep$Earliest.date<-as.Date(covid_rep$Earliest.date)
covid_rep<-covid_rep[order(covid_rep$Earliest.date),]

#stablishing which are the worrying mutations
worring<-c("B.1.351 ","P.1 ","B.1.617.2 ","B.1.1.529 ")
covid_rep$worry<-"No"
covid_rep$worry[which(is.element(covid_rep$Lineage,worring)==TRUE)]<-c("Beta","Gamma","Delta","Omicron")

#stablish alpha for plot colour

worring<-c("B.1.351 ","P.1 ","B.1.617.2 ","B.1.1.529 ")
covid_rep$alpha<-0.1
covid_rep$alpha[which(is.element(covid_rep$Lineage,worring)==TRUE)]<-1


#Eliminate NA's
covid_rep<-covid_rep[-which(is.na(covid_rep$Earliest.date)),]

Difference between days fo mutations

covid_rep$diff_time<-c(0,as.numeric(diff.Date(covid_rep$Earliest.date)))
hist(covid_rep$diff_ti,xlim=c(0,12),breaks=40,xlab="Difference between mutations",main="Histogram")

GGplot2

library(plotly)
library(tidyverse)
library(htmlwidgets)
scatterPlot <- covid_rep %>% 
  ggplot(aes(x = Earliest.date, y = diff_time,text=paste(
               "Description: ", Description, "\n",
                "Most common countries: ", Most.common.countries, "\n",
                          sep = ""
               ))) + 
  geom_point( aes(colour =worry,alpha=alpha )) + 
    scale_color_manual(values = c("Beta"="green","Gamma"="orange","Delta"="red","Omicron"="blue","No"="black"))
  labs(x = "Time", 
       y = "days between variants",
       title = "Covid variants") +
theme_classic()
## NULL
ggplotly(scatterPlot)

As we can see above, the time between mutations starts increase when vaccination of the population speeds up, which could indicate the end of the pandemy.